# --------------------------------------------------------------------------------------
# Title: 
# - False Discovery Rate (FDR) and Bonferroni Correction for Gridded Data

# Objective:
# - Adjust p-values using multiple-testing correction methods, including:
#   - Benjamini-Hochberg (FDR-BH)
#   - Benjamini-Yekutieli (FDR-BY)
#   - Bonferroni correction (FWER control)

# Authors:
# - Oliver Gutiérrez-Hernández: http://orcid.org/0000-0003-2580-5465. E-mail: olivergh@uma.es
# - Luis V. García: http://orcid.org/0000-0002-5514-2941. E-mail: lv.garcia@csic.es

# Source:
# - Gutiérrez-Hernández, O., & García, L.V. (2025). 
#   False Discovery Rate Estimation and Control in Remote Sensing: 
#   Reliable Statistical Significance in Spatially Dependent Gridded Data. 
#   Remote Sensing Letters, 16(5), 537–554. https://doi.org/10.1080/2150704X.2025.2478664

# References:
# - Benjamini, Y., & Hochberg, Y. (1995). “Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing.” 
#   Journal of the Royal Statistical Society: Series B (Methodological), 57(1), 289–300. https://doi.org/10.1111/j.2517-6161.1995.tb02031.x
# - Benjamini, Y., & Yekutieli, D. (2001). "The Control of the False Discovery Rate in Multiple Testing Under Dependency." 
#   The Annals of Statistics, 29(4), 1165–1188. https://doi.org/10.1214/aos/1013699998
# - Bonferroni, C. (1935). "Il calcolo delle assicurazioni su gruppi di teste." 
#   Studi in Onore del Prof. Salvatore Ortu Carboni, 13–60.

# Practical Applications:
# - This code provides a comparative analysis of multiple-testing correction methods rather than advocating for a single approach.
# - The choice of method should align with the data characteristics, with FDR-BH being the preferred option for spatial raster datasets.
# - In most cases, we recommend FDR-BH, as it effectively balances false discovery rate control with statistical power.
# - We do not recommend Bonferroni, as it is overly conservative and significantly reduces statistical power.
# - FDR-BY may be considered in very specific applications, though it is not the usual choice due to its conservative nature.


# Input:
# - **Raster file:** A TIFF image containing p-values to be analyzed.
# - The script reads a raster file (.tif) with p-values and applies multiple-testing corrections.
# - **Requirements for the input raster:**
#   - p-values must range between 0 and 1.
#   - Missing values (e.g., -9999) should be properly handled as NA.

# Outputs:
# 1. **Summary Statistics:**
#    - Total number of valid pixels.
#    - Count and percentage of significant pixels for each correction method.
#
# 2. **Graphical Outputs:**
#    - Binary maps displaying significant results after each correction method.
#
# 3. **File Outputs (if export is enabled):**
#    - Adjusted raster files with FDR-corrected p-values.
#    - Binary maps indicating statistically significant regions.

# --------------------------------------------------------------------------------------
# --- Prerequisites --- # !!! ATTENTION: ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲

# Install necessary packages before running the script.

# Function to check and install a package if not already installed
install_if_missing <- function(package) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package, dependencies = TRUE)
  } else {
    cat(paste("Package", package, "is already installed.\n"))
  }
}

# Install 'terra' for handling raster data
install_if_missing("terra")

# --- Load necessary libraries ---
library(terra)   # For handling raster data

# --- Set paths and load data ---
raster_path <- "C:/data/p-values.tif"  # REPLACE WITH YOUR FILE PATH
raster_pvalues <- rast(raster_path)    # Load the raster data using terra

# Extract raster values
p_values <- values(raster_pvalues)  
valid_indices <- !is.na(p_values)  # Identify valid indices

# Define methods for p-value adjustment
methods <- c("none", "BH", "BY", "bonferroni")  # Includes Bonferroni correction

# Create a list to store binary maps
binary_maps <- list()

# Process p-values with each method
for (method in methods) {
  adjusted_p_values <- rep(NA, length(p_values))  # Initialize adjusted p-values
  adjusted_p_values[valid_indices] <- p.adjust(p_values[valid_indices], method = method)
  
  # Generate binary map based on adjusted p-values
  binary_map <- ifelse(adjusted_p_values <= 0.05, 1, 0)
  binary_map[!valid_indices] <- NA  # Set NA for invalid pixels
  
  # Convert to raster and store
  binary_maps[[method]] <- setValues(raster_pvalues, binary_map)
}

# --- Visualisation of binary maps ---
par(mfrow = c(2, 2))  # Adjust layout for multiple maps

for (method in methods) {
  plot(binary_maps[[method]], 
       main = paste("Significant trends (", 
                    ifelse(method == "none", "Unadjusted p-values",  
                    ifelse(method == "BH", "FDR-BH",  
                    ifelse(method == "BY", "FDR-BY", "Bonferroni"))), ")"),  
       col = c("white", "orange"), 
       legend = FALSE)
}

# --- Significant pixel count and percentage for each method ---
total_tests <- sum(valid_indices)  # Total number of hypothesis tests

for (method in methods) {
  significant_count <- sum(values(binary_maps[[method]]) == 1, na.rm = TRUE)
  percentage_significant <- (significant_count / total_tests) * 100
  method_label <- ifelse(method == "none", "Unadjusted p-values", 
                         ifelse(method == "bonferroni", "Bonferroni", method))
  
  cat(paste("Method:", method_label, "- Significant pixels:", significant_count, 
            "(", round(percentage_significant, 2), "% of total tests)\n"))
}
